# @author:Wei Junjie
# @time:2024/5/25 11:05
# @file MovieSky.py
# --*--coding: utf-8 --*--
"""
电影天堂案例

"""
import csv
import re

# 1 导入需要用到的包
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from pprint import pprint
from requests import Session

# 2.常量数据
BASE_URL = 'https://dy2018.com/'


# 3.获取首页文本内容
def homepage_view(url):
    import requests
    # cookie加密
    cookies = {
        'guardok': 'BcTEcsqHHSF3r6SVkfQTE4RBzmrpCtQptaqU6ySYiMeTG7BjgdRxTHZbSOy2P+s4EeP+5LGmmKhYnbDFjKDSYQ==',
    }
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
    }

    response = requests.get(url, cookies=cookies, headers=headers)
    # 默认编码是utf-8,这里需要将编码改为和响应页面相同的编码
    response.encoding='gb2312'

    return response
def detail_view(response):
    # print(response.text)
    soup = BeautifulSoup(response.text, 'html.parser')
    a_res=soup.find_all('li')
    movie_list=[]
    for item in a_res:
        if item.a['href'].startswith('/i') and item.span:
           movie_list.append(('2024-'+item.span.text, BASE_URL+item.a['href'],item.a['title']))
    return movie_list


def main():
    response = homepage_view(BASE_URL)
    data_list=detail_view(response)
    for item in data_list:
        print(item)


if __name__ == '__main__':
    main()


